************************************************************;
***figure3_6.sas                                         ***;
************************************************************;
***This program creates data for Figures 3-6 in the      ***;
***Davis, Grim, Haltiwanger, and Streitwieser RESTAT     ***;
***paper.                                                ***;
************************************************************;
***Output:                                               ***;
*** csv/figure3.csv                                      ***;
*** csv/figure4.csv                                      ***;
*** csv/figure4_nf.csv                                   ***;
*** csv/figure5.csv                                      ***;
*** csv/figure6.csv                                      ***;
***Datasets for later use:                               ***;
*** elec.outurall                                        ***;
*** elec.outurall_nf                                     ***;
************************************************************;

options ls=80 compress=yes obs=max;

***START MACRO CODE***;

*Macro to get coefficients from GLM into wide dataset;

%macro est(indvar);

data glmest&indvar&year (keep = mgit coeff_&indvar);
	set GLMestimates;
	if parameter = "&indvar";
	coeff_&indvar = estimate;
	mgit = 1;

proc sort data=glmest&indvar&year;
	by mgit;

%mend est;

*Macro to run GLM regression with big utility effects;

%macro doit1(year);

data exutil&year;
	set gs6300;
	if year = &year;	

proc sort data=exutil&year;
	by bigutil;

*Use proc glm to run regression;
*Use ods output to get coefficients;

ods output ParameterEstimates (match_all persist=proc)
	=GLMestimates;

proc glm data=exutil&year;
	class bigutil;
	weight wttvs;
	model lpe_gdp = pe5 pe4 pe3 pe2 logpe bigutil / solution;
	output out=glm&year residual=resid predicted=phat;


%*Get coefficients;

%est(pe5)
%est(pe4)
%est(pe3)
%est(pe2)
%est(logpe)
%est(Intercept)

data glmest&year;
	merge glmestpe5&year glmestpe4&year glmestpe3&year glmestpe2&year 
		glmestlogpe&year glmestintercept&year;
	by mgit;

proc print data=glmest&year;
	title1 "Check glmest&year";

ods output close;
run;
quit;

proc datasets lib=work;
	delete glmestpe5&year glmestpe4&year glmestpe3&year glmestpe2&year
		glmestlogpe&year glmestintercept&year
		GLMestimates;

data glm&year (keep = ppn resid phat);
	set glm&year;

proc sort data=glm&year;
	by ppn;

proc sort data=exutil&year;
	by ppn;

data exutil&year;
	merge exutil&year glm&year;
	by ppn;
	year = &year;
	mgit = 1;

proc sort data=exutil&year;
	by mgit;

*Merge coefficients on as well;

data outur&year;
	merge exutil&year glmest&year;
	by mgit;

%mend doit1;

*Macro to run regression with NO big utility effects;

%macro doit2(year);

data exutil&year;
	set gs6300;
	if year = &year;	

*Use proc glm to run regression;
*Use ods output to get coefficients;

ods output ParameterEstimates (match_all persist=proc)
	=GLMestimates;

proc glm data=exutil&year;
	weight wttvs;
	model lpe_gdp = pe5 pe4 pe3 pe2 logpe / solution;
	output out=glm&year residual=resid predicted=phat;

%*Get coefficients into wide dataset;

%est(pe5)
%est(pe4)
%est(pe3)
%est(pe2)
%est(logpe)
%est(Intercept)

data glmest&year;
	merge glmestpe5&year glmestpe4&year glmestpe3&year glmestpe2&year 
		glmestlogpe&year glmestintercept&year;
	by mgit;

proc print data=glmest&year;
	title1 "Check glmest&year";

ods output close;
run;
quit;

proc datasets lib=work;
	delete glmestpe5&year glmestpe4&year glmestpe3&year glmestpe2&year
		glmestlogpe&year glmestintercept&year
		GLMestimates;

data glm&year (keep = ppn resid phat);
	set glm&year;

proc sort data=glm&year;
	by ppn;

proc sort data=exutil&year;
	by ppn;

data exutil&year;
	merge exutil&year glm&year;
	by ppn;
	year = &year;
	mgit = 1;

proc sort data=exutil&year;
	by mgit;

%*Merge coefficients on as well;

data outur&year;
	merge exutil&year glmest&year;
	by mgit;

%mend doit2;

*Macro to calculate weighted mean elasticities;
	
%macro d1(ds, wt, var);

proc sort data=&ds;
	by year;

proc summary data=&ds noprint;
	by year;
	var &var;
	weight wt&wt;
	output out=mean_&var._&wt
	mean(&var) = mean_&var._&wt;

proc sort data=mean_&var._&wt;
	by year;

%mend d1;

*Macro to sort all data sets;

%macro sortit(name);

proc sort data=&name;
	by year decile;

%mend sortit;

*Macro to compute within and between variances by year for each location;

%macro doit3(loc, var);

%*Calculate the location specific mean;

proc sort data=gs6300;
	by year decile &var;

proc means data=gs6300 noprint;
	var resid;
	by year decile &var;
	weight wt&var1;
	output out=mean&loc mean=mean;

data stat&loc (keep = mean_yr_&loc year decile &var);
	set mean&loc;
	mean_yr_&loc = mean;


%*Merge the location mean back to the plant level data in gs6300;

proc sort data=stat&loc;
	by year decile &var;

data gs6300;
	merge gs6300 (in=a) stat&loc;
	by year decile &var;
	if a;

%*Compute the squared deviation of the plant resid from location mean resid;

data gs6300;
	set gs6300;
	sqdev&loc = (resid - mean_yr_&loc)*(resid - mean_yr_&loc);

%*Sum up all the squared deviations of the plants in that location 
using the sample weights and create number of plants in each location;

proc means data=gs6300 noprint ;
	by year decile &var;
	output out=sum&loc
	sum(sqdev&loc) = sumsqdev&loc
	sum(num) = num&loc;
	var sqdev&loc num;
	weight wt&var1;

%*Compute a location weight which is equal to the weighted number of plants in the
location divided by the TOTAL weighted number of plants in the entire country;


data sum&loc (keep = year decile &var num&loc sumsqdev&loc);
	set sum&loc;

%*Merge the num&loc and sumsqdev&loc onto stat&loc;

proc sort data=stat&loc;
	by year decile &var;

proc sort data=sum&loc;
	by year decile &var;

data stat&loc;
	merge stat&loc (in=a) sum&loc (in=b);
	by year decile &var;
	if a;

%*Merge numus and mean_yr_tot onto stat&loc;
%*Calculate wt&loc;

proc sort data=gs6300 nodupkey out=&loc.wt (keep = year decile &var numus mean_yr_tot);
	by year decile &var;

data stat&loc;
	merge stat&loc &loc.wt;
	by year decile &var;
	wt&loc = num&loc/numus;
	sumsqdev&loc = sumsqdev&loc / num&loc;

%*Compute within location variance as the weighted sum of sumsqdev&loc using wtloc;

proc means data=stat&loc noprint ;
	by year decile;
	output out=wtnus&loc
	sum(sumsqdev&loc) = var_wtn_&loc;
	var sumsqdev&loc;
	weight wt&loc;

%**Compute between location variance;

%*Take location specific mean and deviate from grand mean;
%*(grand mean is the sample weighted grand mean for the year);
%*Square this deviation;

data stat&loc;
	set stat&loc;
	sqdevbtn&loc = (mean_yr_&loc - mean_yr_tot)*(mean_yr_&loc - mean_yr_tot);

%*Sum up these squared deviations using wt&loc;

proc means data=stat&loc noprint ;
	by year decile;
	output out=btnus&loc
	sum(sqdevbtn&loc) = var_btn_&loc;
	var sqdevbtn&loc;
	weight wt&loc;

%mend doit3;


*Macro to calculate between-county variation;
	
%macro decomp(var1);

data gs6300;
	set gs6300;
	wt&var1 = wt*&var1;

%*Compute grand mean for each year-decile;

proc means data=gs6300 noprint;
	var resid;
	by year decile;
	weight wt&var1;	
	output out=meantot mean=mean;

data meantot (keep = year decile mean_yr_tot);
	set meantot;
	mean_yr_tot = mean;

%*Compute total weighted number of plants in US in each year-decile;

proc means data=gs6300 noprint ;
	by year decile;
	output out=sumus
	sum(num) = numus;
	var num;
	weight wt&var1;

%*Merge grand mean and total weighted number of plants in US to gs6300;

proc sort data=sumus (keep = year decile numus);
	by year decile;

proc sort data=meantot;
	by year decile;

proc sort data=gs6300;
	by year decile;

data gs6300;
	merge gs6300 meantot sumus;
	by year decile;

%*Compute within and between variances by year for each location;

%doit3(cy, cyfstn)
%doit3(bu, bigutil)

%**Cross-check;

data gs6300;
	set gs6300;
	sqdevtot = (resid - mean_yr_tot)*(resid - mean_yr_tot);

proc means data=gs6300 noprint;
	by year decile;
	output out=totus
	sum(sqdevtot) = sum_tot;
	var sqdevtot;
	weight wt&var1;

proc sort data=totus (keep = year decile sum_tot);
	by year decile;

proc sort data=gs6300;
	by year decile;
	
data gs6300;
	merge gs6300 totus;
	by year decile;
	var_tot = sum_tot / numus;

%*Create data set with var_tot and year only;

data vartot;
	set gs6300 (keep = year decile var_tot numus);

proc sort data = vartot nodupkey;
	by year decile;	

%*Create data set with between, within, and total variance for all locations by year-decile;

%*Sort all data sets;

%sortit(wtnuscy) 
%sortit(btnuscy) 
%sortit(wtnusbu) 
%sortit(btnusbu) 

data decomp_&var1 (drop = _TYPE_ _FREQ_);
	merge wtnuscy btnuscy wtnusbu btnusbu vartot;
	by year decile;
	
	var_est_cy = (var_btn_cy + var_wtn_cy);
	var_est_bu = (var_btn_bu + var_wtn_bu);


proc print data=decomp_&var1;
	var var_tot
	    var_est_cy var_est_bu;
	title1 'Compare these';

proc print data=decomp_&var1;
	var year var_wtn_cy var_btn_cy var_wtn_bu var_btn_bu 
		var_tot ;
	title1 "decomp_&var1 data set"; 

%mend decomp;

*Macro to create wide dataset of between-county variation;

%macro wide();

%do i=1 %to 10;

data d&i (keep = year std_btn_bu_&i std_btn_cy_&i std_wtn_bu_&i std_wtn_cy_&i std_tot_&i);
	set decomp_pe;
	if decile = &i;

	std_btn_bu_&i = std_btn_bu;
	std_btn_cy_&i = std_btn_cy;
	std_wtn_bu_&i = std_wtn_bu;
	std_wtn_cy_&i = std_wtn_cy;
	std_tot_&i = std_tot;

proc sort data=d&i;
	by year;
%end;

%mend wide;

*Macro to generate data for implied Santee Cooper schedule (figure 6);

%macro doit_sc(type);

	*Large Light and Power Schedule;

	dm300_&type = demand_&type - 300;
	if dm300_&type < 0 then dm300_&type = 0;

	dm1000_&type = demand_&type - 1000;
	if dm1000_&type < 0 then dm1000_&type = 0;

	*Calculate annual expenditures (in 1000$);

	exp_ll_&type = (12/1000)*(1200 + 10760 + (dm1000_&type * 10.76) + (kwh_month * 0.0219));

	*Calculate LL price;

	price_ll_&type = exp_ll_&type / pe;

	log_pll_&type = log(price_ll_&type);

	*Commercial - General Service (GN);

	exp_gn_&type = (12/1000) * (6.85 + (kwh_month*0.0656));

	price_gn_&type = exp_gn_&type / pe;

	log_pgn_&type = log(price_gn_&type);

	*Commercial - Medium General Service (GS);

	exp_gs_&type = (12/1000) * (16.15 + (demand_&type * 11.85) + (kwh_month*0.0260));

	price_gs_&type = exp_gs_&type / pe;

	log_pgs_&type = log(price_gs_&type);

	*Commercial - Large General Service (GL);

	exp_gl_&type = (12/1000) * (24 + (3960 + (dm300_&type * 13.2)) + (kwh_month*0.0232));

	price_gl_&type = exp_gl_&type / pe;

	log_pgl_&type = log(price_gl_&type);

	*Determine best price schedule for each;

	best_price_&type = 0;
	best_schedule_&type = 0;

	*Commercial General Service;

	if log_pgn_&type < log_pll_&type and 
	   log_pgn_&type < log_pgs_&type and 
	   log_pgn_&type < log_pgl_&type then do;

		best_schedule_&type = 1;
		best_price_&type = log_pgn_&type;	

	end;
	*Commercial Medium General Service;

	else if log_pgs_&type < log_pll_&type and 
	   log_pgs_&type < log_pgn_&type and 
	   log_pgs_&type < log_pgl_&type then do;

		best_schedule_&type = 2;
		best_price_&type = log_pgs_&type;	

	end;
	*Commercial Large General Service;

	if log_pgl_&type < log_pll_&type and 
	   log_pgl_&type < log_pgs_&type and 
	   log_pgl_&type < log_pgn_&type then do;

		best_schedule_&type = 3;
		best_price_&type = log_pgl_&type;	

	end;
	*Industrial Large Light and Power;

	if log_pll_&type < log_pgn_&type and 
	   log_pll_&type < log_pgs_&type and 
	   log_pll_&type < log_pgl_&type then do;

		best_schedule_&type = 4;
		best_price_&type = log_pll_&type;	

	end;
	
%mend doit_sc;


***START PROGRAM***;

data gs6300;
	set elec.gs6300 (keep = ppn bigutil year lpe lpe_gdp logpe wt wttvs wtpe
		centile cyfstn fipsst grid flag5_pw tvs);
	pe5 = logpe * logpe * logpe * logpe * logpe;
	pe4 = logpe * logpe * logpe * logpe;
	pe3 = logpe * logpe * logpe;
	pe2 = logpe * logpe;
	
	*Drop partial year obs;
	if flag5_pw ne 1;

proc sort data=gs6300;
	by year bigutil;

*Calculate mean lpe_gdp by year;

proc univariate data=gs6300 noprint;
	by year;
	weight wttvs;
	var lpe_gdp;
	output out=meangs_yr mean=mean;

data meangs_yr (keep = year mean_yr_lpe_gdp);
	set meangs_yr;
	mean_yr_lpe_gdp = mean;

proc sort data=meangs_yr;
	by year;

*Run regression with big utility effects;

%doit1(1963)
%doit1(1967)
%doit1(1972)
%doit1(1973)
%doit1(1974)
%doit1(1975)
%doit1(1976)
%doit1(1977)
%doit1(1978)
%doit1(1979)
%doit1(1980)
%doit1(1981)
%doit1(1982)
%doit1(1983)
%doit1(1984)
%doit1(1985)
%doit1(1986)
%doit1(1987)
%doit1(1988)
%doit1(1989)
%doit1(1990)
%doit1(1991)
%doit1(1992)
%doit1(1993)
%doit1(1994)
%doit1(1995)
%doit1(1996)
%doit1(1997)
%doit1(1998)
%doit1(1999)
%doit1(2000)

data outurall;
	set outur1963 outur1967 outur1972 outur1973
	outur1974 outur1975 outur1976 outur1977
	outur1978 outur1979 outur1980 outur1981
	outur1982 outur1983 outur1984 outur1985
	outur1986 outur1987 outur1988 outur1989
	outur1990 outur1991 outur1992 outur1993
	outur1994 outur1995 outur1996 outur1997
	outur1998 outur1999 outur2000;

proc sort data=outurall;
	by year;	 

data outurall;
	merge outurall (in=a) meangs_yr (in=b);
	by year;
	if a;

*Calculate mean phat by year;

proc univariate data=outurall noprint;
	by year;
	var phat;
	weight wttvs;
	output out=mean_phat_yr mean=mean;

data mean_phat_yr (keep = year mean_phat_yr);
	set mean_phat_yr;
	mean_phat_yr = mean;

proc sort data=mean_phat_yr;
	by year;

data npouturall;
	merge outurall mean_phat_yr;
	by year;

	phat0 = (coeff_pe5*pe5) + (coeff_pe4*pe4) + (coeff_pe3*pe3)
			+ (coeff_pe2*pe2) + (coeff_logpe*logpe);
	phat1 = coeff_intercept + (coeff_pe5*pe5) + (coeff_pe4*pe4) + (coeff_pe3*pe3)
			+ (coeff_pe2*pe2) + (coeff_logpe*logpe);

	phat2 = phat - phat0; *Equal to the constant term in phat;

	*Create plant-level elasticity;
	elas = coeff_logpe + (2*coeff_pe2*logpe) + (3*coeff_pe3*pe2)
			+ (4*coeff_pe4*pe3) + (5*coeff_pe5*pe4);


*Calculate mean phat1 and phat2 by year;

proc summary data=npouturall noprint;
	by year;
	var phat1 phat2;
	weight wttvs;
	output out=mean_phats_yr 
	mean(phat1) = mean_phat1_yr
	mean(phat2) = mean_phat2_yr;

data mean_phats_yr (keep = year mean_phat1_yr mean_phat2_yr);
	set mean_phats_yr;

proc sort data=mean_phats_yr;
	by year;

data npouturall;
	merge npouturall mean_phats_yr;
	by year;
	int_adj = mean_phat2_yr;

	phat_fin = int_adj + (coeff_pe5*pe5) + (coeff_pe4*pe4) + (coeff_pe3*pe3)
			+ (coeff_pe2*pe2) + (coeff_logpe*logpe);

data outcoeff;
	set npouturall (keep = year int_adj coeff_:);

proc sort data=outcoeff nodup;
	by year int_adj coeff_pe5 coeff_pe4 coeff_pe3 coeff_pe2 coeff_logpe;

*These are the coefficients needed to generate the curves in the lower panel (with utility fixed effects) of Figure 4;

proc export data=outcoeff outfile="csv/figure4.csv"
	dbms=csv replace;

*Check mean of phat_fin by year;

proc univariate data=npouturall noprint;
	by year;
	weight wttvs;
	var phat_fin;
	output out=mean_phat_fin_yr mean=mean;

data mean_phat_fin_yr (keep = year mean_phat_fin_yr);
	set mean_phat_fin_yr;
	mean_phat_fin_yr = mean;

data means_all;
	merge mean_phat_fin_yr (in=a) meangs_yr (in=b) mean_phat_yr (in=c);
	by year;
	if a;

proc print data=means_all;
	title1 "CHECK:  means_all";
run;

proc datasets lib=work;
	delete exutil: glm: glmest: mean_: outcoeff outur: means_all;
run;
quit;

***Regression with no big utility fixed effects;


%doit2(1963)
%doit2(1967)
%doit2(1972)
%doit2(1973)
%doit2(1974)
%doit2(1975)
%doit2(1976)
%doit2(1977)
%doit2(1978)
%doit2(1979)
%doit2(1980)
%doit2(1981)
%doit2(1982)
%doit2(1983)
%doit2(1984)
%doit2(1985)
%doit2(1986)
%doit2(1987)
%doit2(1988)
%doit2(1989)
%doit2(1990)
%doit2(1991)
%doit2(1992)
%doit2(1993)
%doit2(1994)
%doit2(1995)
%doit2(1996)
%doit2(1997)
%doit2(1998)
%doit2(1999)
%doit2(2000)

data outurall;
	set outur1963 outur1967 outur1972 outur1973
	outur1974 outur1975 outur1976 outur1977
	outur1978 outur1979 outur1980 outur1981
	outur1982 outur1983 outur1984 outur1985
	outur1986 outur1987 outur1988 outur1989
	outur1990 outur1991 outur1992 outur1993
	outur1994 outur1995 outur1996 outur1997
	outur1998 outur1999 outur2000;

proc sort data=outurall;
	by year;	 

data npouturall_nf;
	merge outurall (in=a) meangs_yr (in=b);
	by year;
	if a;

	elas_nf = coeff_logpe + (2*coeff_pe2*logpe) + (3*coeff_pe3*pe2)
		+ (4*coeff_pe4*pe3) + (5*coeff_pe5*pe4);

	phat_fin = phat;

data outcoeff;
        set npouturall_nf (keep = year coeff_:);

proc sort data=outcoeff nodup;
        by year coeff_intercept coeff_pe5 coeff_pe4 coeff_pe3 coeff_pe2 coeff_logpe;

*These are the coefficients needed to generate the curves in the upper panel (without utility fixed effects) of Figure 4;

proc export data=outcoeff outfile="csv/figure4_nf.csv"
        dbms=csv replace;
run;

proc datasets lib=work;
	delete exutil: glm: glmest: mean_: outcoeff outur: means_all;
run;
quit;

**Create elasticity datasets for later use in this program;

data outurall;
	set npouturall;

data outurall_nf;
	set npouturall_nf;

**Create elasticity datasets for later use in other programs;

data elec.outurall;
	set npouturall;

data elec.outurall_nf;
	set npouturall_nf;


*Create weighted mean elasticities;

%d1(outurall, tvs, elas)
%d1(outurall, pe, elas)
%d1(outurall_nf, tvs, elas_nf)
%d1(outurall_nf, pe, elas_nf)

data elas;
	merge mean_elas_tvs mean_elas_pe mean_elas_nf_tvs mean_elas_nf_pe;
	by year;

proc print data=elas;
	title1 "Elasticity Dataset";

proc export data=elas outfile="csv/figure5.csv"	
	dbms=csv replace;

run;

proc datasets lib=work;
	delete gs6300;
run;
quit;

**Figure 3;

*Create shipments-weighted time invariant decile cutoffs;
*Calculate by year and then take simple average so all years get the same weight;

data gs6300;
	set elec.gs6300 (keep = ppn year logpe wt tvs wttvs lpe pe ee lpe_gdp wtpe bigutil cyfstn flag5_pw id te);
	mgit = 1;
	*Drop part-year obs;
	if flag5_pw ne 1;

proc sort data=gs6300;
	by year;

proc univariate data=gs6300 noprint;
	by year;
	var logpe;
	weight wttvs;
	output out=deciles pctlpre=p pctlpts=10 20 30 40 50 60 70 80 90;

data deciles (keep = year p10 p20 p30 p40 p50 p60 p70 p80 p90);
	set deciles;

*Take simple average for all years;

proc summary data=deciles noprint;
	var p10 p20 p30 p40 p50 p60 p70 p80 p90;
	output out=deciles_avg
	mean(p10) = mp10
	mean(p20) = mp20
	mean(p30) = mp30
	mean(p40) = mp40
	mean(p50) = mp50
	mean(p60) = mp60
	mean(p70) = mp70
	mean(p80) = mp80
	mean(p90) = mp90;

data deciles_avg;
	set deciles_avg;

data gs6300;
	if _N_ = 1 then set deciles_avg;
	set gs6300;
	
	decile_pool = .;
	if logpe < mp10 then decile_pool = 1;
	else if logpe ge mp10 and logpe < mp20 then decile_pool = 2;
	else if logpe ge mp20 and logpe < mp30 then decile_pool = 3;
	else if logpe ge mp30 and logpe < mp40 then decile_pool = 4;
	else if logpe ge mp40 and logpe < mp50 then decile_pool = 5;
	else if logpe ge mp50 and logpe < mp60 then decile_pool = 6;
	else if logpe ge mp60 and logpe < mp70 then decile_pool = 7;
	else if logpe ge mp70 and logpe < mp80 then decile_pool = 8;
	else if logpe ge mp80 and logpe < mp90 then decile_pool = 9;
	else if logpe ge mp90 then decile_pool = 10;

proc freq data=gs6300;
	tables decile_pool;
	title1 "Check decile_pool for all years";

*Open dataset with results of 5th order regression (no utility fixed effects);

data fifth;
	set exu027ww.npouturall_nf (keep = resid ppn year);
	abs_resid = abs(resid);

*Merge this to pqem dataset with deciles;

proc sort data=fifth;
	by ppn year;

proc sort data=gs6300;
	by ppn year;

data gs6300;
	merge gs6300 (in=a) fifth (in=b);
	by ppn year;

	check_merge = 0;
	if a and b then check_merge = 1;
	else if a and not b then check_merge = 2;
	else if b and not a then check_merge = 3;

proc freq data=gs6300;
	tables check_merge / missing;
	title1 "Check merge of gs6300 and fifth";

*Calculate purchase-weighted between-county variation;

data gs6300;
	set gs6300 (drop = check_merge);
	decile = decile_pool;

proc sort data=gs6300;
	by year decile;

data gs6300;
	set gs6300;
	wt100=wt*100;
	num=1;

*Create dataset of between-county variation (purchase-weighted);

%decomp(pe);


*Create wide dataset of between-county variation;

data decomp_pe;
	set decomp_pe;

	%*Create standard deviations;
	std_btn_bu = sqrt(var_btn_bu);
	std_btn_cy = sqrt(var_btn_cy);
	std_wtn_bu = sqrt(var_wtn_bu);
	std_wtn_cy = sqrt(var_wtn_cy);
	std_tot = sqrt(var_tot);

%wide()

data gr_decomp_pe (keep = year std_btn_cy:);
	merge d1 d2 d3 d4 d5 d6 d7 d8 d9 d10;
	by year;

proc export data=gr_decomp_pe outfile="csv/figure3.csv"
	dbms=csv replace;

run;

**Figure 6;

*Create dataset of fake PE values;

data cpe (keep = cpe);
	set elec.gs6300 (keep = year);
	if _N_ le 10000;
	cpe = _N_ * 1000;

data one;
	set cpe;
	if _N_ le 1000;
	cpe = _N_;

data cpe;
	set one cpe;
	label cpe = 'Fake PE value - not real data';

	if cpe ne 1;

*Add a few higher obs;

data cpe2;
	set cpe;

	if _N_ le 7;

	if _N_ = 1 then cpe = 11000000;
	if _N_ = 2 then cpe = 12000000;
	if _N_ = 3 then cpe = 13000000;
	if _N_ = 4 then cpe = 14000000;
	if _N_ = 5 then cpe = 15000000;
	if _N_ = 6 then cpe = 16000000;
	if _N_ = 7 then cpe = 17000000;

data cpe;
	set cpe cpe2;

proc sort data=cpe;
	by cpe;

data cpe;
	set cpe;

	pe = cpe;

	*Create kW of demand;
	*Recall PE is in 1000 kWh and is the annual quantity of purchased electricity;

	kwh_month = (pe / 12) * 1000;
	mwh_month = pe / 12;
	lmwh_month = log(mwh_month);
	lkwh_month = log(kwh_month);
	logpe = log(pe);

	*Demand-Consumption Schedules 1-4: kW of demand are equal to 2-5 times MWh per month;

	demand_s1 = mwh_month * 2;
	demand_s2 = mwh_month * 3;
	demand_s3 = mwh_month * 4;
	demand_s4 = mwh_month * 5;

	*Demand-consumption schedule 5: Variable based on 1986 EIA-213 numbers;

	mult = 0;
	l30 = log(30);
	l2500 = log(2500);
	diff = l2500 - l30;

	if mwh_month < 30 then mult = 5;
	else if mwh_month ge 30 and mwh_month < 2500 then 
		mult = 5 - (((lmwh_month - l30)/diff) * (5-2));
	else if mwh_month ge 2500 then mult = 2;

	demand_s5 = mwh_month * mult;

	*Demand-consumption schedule 6: Variable based on 1986 EIA-213 numbers;

	mult2 = 0;

	if mwh_month < 30 then mult2 = 2.5;
	else if mwh_month ge 30 and mwh_month < 2500 then 
		mult2 = 2.5 - (((lmwh_month - l30)/diff) * (2.5-2));
	else if mwh_month ge 2500 then mult2 = 2;

	demand_s6 = mwh_month * mult2;
	
	*Demand-consumption schedule 7:  Variable Load factor of 25% to 70%;

	loadf = 0;

	if mwh_month < 30 then loadf = 0.25;
	else if mwh_month ge 30 and mwh_month < 2500 then
		loadf = 0.25 + (((lmwh_month - l30)/diff) * (0.7-0.25));
	else if mwh_month ge 2500 then loadf = 0.70;


	demand_s7 = (1000/730) * (mwh_month/loadf);

	*Demand-consumption schedule 8:  Fixed Load factor of 50%;

	loadf_fix = 0.5;

	demand_s8 = (1000/730) * (mwh_month/loadf);

	*Macro to create numbers for eight different schedules;

	%doit_sc(s1)
	%doit_sc(s2)
	%doit_sc(s3)
	%doit_sc(s4)
	%doit_sc(s5)
	%doit_sc(s6)
	%doit_sc(s7)
	%doit_sc(s8)

proc sort data=cpe;
	by logpe;

*Check;

proc freq data=cpe;
	tables best_schedule_s1 best_schedule_s2 best_schedule_s3 best_schedule_s4 best_schedule_s5
		best_schedule_s6 best_schedule_s7 best_schedule_s8;
	title1 "Check Best Schedule";

proc print data=cpe (obs=2000);
	var mwh_month mult mult2 loadf;
	title1 "Check log linear multipliers";

data cpe (drop = logpe);
	set cpe (keep = logpe best_price_s8);
	logpe_r = round(logpe, .1);

proc sort data=cpe nodupkey;
	by logpe_r;

*These are the data for the Implied Santee Cooper Schedule in Figure 6;
*The coefficients needed to generate the Fifth-Order Polynomial Fit to 2000 PQEM Data curve are in figure4.csv;

proc export data=cpe outfile="csv/figure6.csv"
	dbms=csv replace;

run;
